import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
import numpy as np
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
# Importing pre-processed dataset
diabetes_df = pd.read_csv('processed_diabetes5050_dataset.csv')
# Acquiring target values and attributes
y = diabetes_df['Diabetes_binary'].values
y = np.float32(y)
X = diabetes_df.drop(columns=['Diabetes_binary']).values
X = np.float32(X)
input_dim = X.shape[1] # NN input dimension = number of attributes
# Performing stratified train test split where test set is 30% of dataset
np.random.seed(0)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42, shuffle=True, stratify=y)
# Saving train and test files so records are kept, and the test sets can be used in the final model notebook
np.save('final_models/X_train.npy', X_train)
np.save('final_models/X_test.npy', X_test)
np.save('final_models/y_train.npy', y_train)
np.save('final_models/y_test.npy', y_test)
# Copying values to avoid changes in datatype
X_train_MLP = np.copy(X_train)
X_test_MLP = np.copy(X_test)
y_train_MLP = np.copy(y_train)
y_test_MLP = np.copy(y_test)
X_train_SVM = np.copy(X_train)
X_test_SVM = np.copy(X_test)
y_train_SVM = np.copy(y_train)
y_test_SVM = np.copy(y_test)
class MLP_network(nn.Module):
def __init__(self, input_dim=input_dim, hidden_dim=10, output_dim=1, extra_hidden_layers=0, dropout=0):
super(MLP_network, self).__init__()
layers = [] # list of layers in network
layers.append(nn.Linear(input_dim, hidden_dim, bias=True)) # linear connections
layers.append(nn.ReLU(inplace=True)) # activation function
for i in range(extra_hidden_layers):
layers.append(nn.Linear(hidden_dim, hidden_dim, bias=True))
layers.append(nn.ReLU(inplace=True)) # activation function
layers.append(nn.Linear(hidden_dim, output_dim, bias=True)) # linear connection to output layer
self.output = nn.Sequential(*layers)
def forward(self, X):
# sigmoid activation function is applied in loss function, so it is excluded here
return self.output(X)
from skorch.classifier import NeuralNetBinaryClassifier
import optuna
from optuna.integration import SkorchPruningCallback
def mlp_objective(trial):
hidden_dim = trial.suggest_int('hidden_dim', 4, input_dim*2)
extra_hidden_layers = trial.suggest_int('extra_hidden_layers', 0, 5) # already one hidden layer in the model
batch_size = trial.suggest_categorical('batch_size', [64, 128, 256, 512])
learning_rate = trial.suggest_float('learning_rate', 0.0001, 0.1)
momentum = trial.suggest_float('momentum', 0.5, 1)
weight_decay = trial.suggest_float('weight_decay', 0.0001, 0.1)
max_epochs = trial.suggest_int('max_epochs', 50, 200, 10)
torch.manual_seed(0)
net = NeuralNetBinaryClassifier(
MLP_network(),
module__hidden_dim=hidden_dim,
module__extra_hidden_layers=extra_hidden_layers,
criterion=torch.nn.BCEWithLogitsLoss,
optimizer=torch.optim.SGD,
optimizer__momentum=momentum, # momentum set to commonly used value to save CPU time
optimizer__lr=learning_rate,
optimizer__weight_decay=weight_decay,
max_epochs=max_epochs,
batch_size=batch_size,
callbacks=[SkorchPruningCallback(trial, 'valid_acc')],
)
net.fit(X_train_MLP, y_train_MLP)
y_pred = net.predict(X_test_MLP)
return accuracy_score(y_test_MLP, y_pred)
# Hyperparameter tuning using optuna
pruner = optuna.pruners.MedianPruner(n_warmup_steps=10) # pruning using the median stopping rule
mlp_study = optuna.study.create_study(direction="maximize", pruner=pruner)
mlp_study.optimize(mlp_objective, n_trials=200)
mlp_best_params = mlp_study.best_params
print('Hyperparameters for best trial:', mlp_best_params)
Hyperparameters for best trial: {'hidden_dim': 23, 'extra_hidden_layers': 3, 'batch_size': 128, 'learning_rate': 0.026645655560194527, 'momentum': 0.7281967530014857, 'weight_decay': 0.0003253167822144232, 'max_epochs': 190}
import plotly
# Plotting intermediate values
mlp_fig1 = optuna.visualization.plot_intermediate_values(mlp_study)
mlp_fig1.show()
# Plotting slices for each hyperparameter
mlp_fig2 = optuna.visualization.plot_slice(mlp_study, params=['hidden_dim', 'extra_hidden_layers', 'learning_rate', 'momentum', 'weight_decay', 'max_epochs', 'batch_size'])
mlp_fig2.show()
# Plotting optimisation history
mlp_fig3 = optuna.visualization.plot_optimization_history(mlp_study)
mlp_fig3.show()
Best hyperparameters found:
'hidden_dim': 23, 'extra_hidden_layers': 3, 'batch_size': 128, 'learning_rate': 0.026645655560194527, 'momentum': 0.7281967530014857, 'weight_decay': 0.0003253167822144232, 'max_epochs': 190
torch.manual_seed(0)
net_final = NeuralNetBinaryClassifier(
MLP_network(),
module__hidden_dim=mlp_best_params['hidden_dim'],
module__extra_hidden_layers=mlp_best_params['extra_hidden_layers'],
criterion=torch.nn.BCEWithLogitsLoss,
optimizer=torch.optim.SGD,
optimizer__momentum=mlp_best_params['momentum'],
optimizer__lr=mlp_best_params['learning_rate'],
optimizer__weight_decay=mlp_best_params['weight_decay'],
max_epochs=mlp_best_params['max_epochs'],
batch_size=mlp_best_params['batch_size'],
)
net_final.fit(X_train_MLP, y_train_MLP) # Fitting final model
epoch train_loss valid_acc valid_loss dur
------- ------------ ----------- ------------ ------
1 0.6093 0.7116 0.5574 0.6945
2 0.5596 0.7285 0.5393 0.6621
3 0.5466 0.7373 0.5306 0.6601
4 0.5381 0.7436 0.5209 0.6651
5 0.5335 0.7457 0.5188 0.6531
6 0.5302 0.7478 0.5170 0.6762
7 0.5284 0.7475 0.5156 0.7132
8 0.5275 0.7482 0.5143 0.6621
9 0.5265 0.7482 0.5169 0.6701
10 0.5257 0.7484 0.5159 0.6792
11 0.5245 0.7500 0.5151 0.6812
12 0.5237 0.7503 0.5143 0.7142
13 0.5229 0.7513 0.5141 0.6772
14 0.5225 0.7518 0.5138 0.6711
15 0.5221 0.7504 0.5142 0.7022
16 0.5216 0.7513 0.5128 0.6631
17 0.5214 0.7503 0.5144 0.6882
18 0.5211 0.7501 0.5120 0.6802
19 0.5209 0.7513 0.5116 0.6822
20 0.5204 0.7507 0.5121 0.6802
21 0.5200 0.7508 0.5109 0.6912
22 0.5195 0.7506 0.5127 0.6912
23 0.5198 0.7512 0.5111 0.6701
24 0.5194 0.7519 0.5114 0.6832
25 0.5191 0.7513 0.5110 0.6782
26 0.5189 0.7515 0.5112 0.6772
27 0.5187 0.7513 0.5116 0.6872
28 0.5187 0.7512 0.5112 0.6832
29 0.5185 0.7506 0.5099 0.6862
30 0.5179 0.7516 0.5104 0.6892
31 0.5180 0.7529 0.5101 0.6812
32 0.5178 0.7531 0.5098 0.6992
33 0.5175 0.7525 0.5105 0.6782
34 0.5176 0.7528 0.5093 0.6992
35 0.5176 0.7530 0.5099 0.6822
36 0.5175 0.7533 0.5094 0.6962
37 0.5176 0.7518 0.5100 0.6661
38 0.5175 0.7501 0.5094 0.6772
39 0.5172 0.7519 0.5109 0.6992
40 0.5173 0.7522 0.5097 0.6902
41 0.5169 0.7500 0.5093 0.6872
42 0.5169 0.7501 0.5090 0.6872
43 0.5167 0.7503 0.5092 0.7162
44 0.5166 0.7495 0.5088 0.6982
45 0.5164 0.7502 0.5087 0.6721
46 0.5164 0.7506 0.5096 0.6691
47 0.5161 0.7516 0.5093 0.6962
48 0.5164 0.7511 0.5080 0.7062
49 0.5160 0.7505 0.5085 0.6872
50 0.5159 0.7509 0.5082 0.6992
51 0.5160 0.7507 0.5078 0.7032
52 0.5158 0.7513 0.5086 0.6792
53 0.5156 0.7519 0.5079 0.6892
54 0.5157 0.7516 0.5080 0.7052
55 0.5158 0.7518 0.5077 0.7192
56 0.5155 0.7513 0.5074 0.6992
57 0.5156 0.7518 0.5079 0.7392
58 0.5155 0.7508 0.5078 0.7132
59 0.5156 0.7512 0.5075 0.7102
60 0.5156 0.7520 0.5070 0.7122
61 0.5154 0.7506 0.5070 0.6982
62 0.5155 0.7519 0.5072 0.6952
63 0.5155 0.7508 0.5076 0.6711
64 0.5154 0.7521 0.5067 0.6742
65 0.5154 0.7513 0.5069 0.6691
66 0.5154 0.7519 0.5068 0.7072
67 0.5154 0.7519 0.5070 0.6942
68 0.5153 0.7518 0.5073 0.7262
69 0.5151 0.7522 0.5068 0.7112
70 0.5151 0.7512 0.5069 0.7082
71 0.5147 0.7528 0.5073 0.7092
72 0.5150 0.7511 0.5075 0.7012
73 0.5150 0.7522 0.5071 0.6862
74 0.5149 0.7516 0.5069 0.6962
75 0.5145 0.7519 0.5070 0.7032
76 0.5146 0.7522 0.5068 0.6859
77 0.5144 0.7519 0.5072 0.7062
78 0.5148 0.7514 0.5074 0.7262
79 0.5149 0.7520 0.5072 0.7122
80 0.5147 0.7511 0.5071 0.7362
81 0.5146 0.7511 0.5068 0.7122
82 0.5146 0.7527 0.5068 0.7202
83 0.5145 0.7521 0.5067 0.6982
84 0.5144 0.7531 0.5062 0.7102
85 0.5145 0.7515 0.5072 0.7042
86 0.5145 0.7516 0.5068 0.6762
87 0.5143 0.7522 0.5068 0.6942
88 0.5144 0.7525 0.5066 0.7032
89 0.5144 0.7511 0.5069 0.7082
90 0.5143 0.7515 0.5068 0.6912
91 0.5143 0.7528 0.5065 0.6862
92 0.5142 0.7519 0.5067 0.6792
93 0.5143 0.7516 0.5071 0.7012
94 0.5141 0.7517 0.5071 0.6912
95 0.5144 0.7519 0.5064 0.6882
96 0.5140 0.7529 0.5066 0.7062
97 0.5142 0.7519 0.5068 0.6952
98 0.5142 0.7520 0.5065 0.6932
99 0.5142 0.7523 0.5068 0.6862
100 0.5142 0.7528 0.5062 0.7012
101 0.5141 0.7527 0.5068 0.6822
102 0.5138 0.7529 0.5067 0.6922
103 0.5140 0.7515 0.5068 0.6972
104 0.5138 0.7529 0.5066 0.6962
105 0.5141 0.7520 0.5065 0.6962
106 0.5140 0.7527 0.5064 0.6792
107 0.5140 0.7527 0.5064 0.6942
108 0.5138 0.7517 0.5066 0.7262
109 0.5138 0.7513 0.5068 0.7082
110 0.5137 0.7518 0.5063 0.6902
111 0.5137 0.7513 0.5067 0.7082
112 0.5138 0.7526 0.5060 0.6972
113 0.5137 0.7516 0.5067 0.6892
114 0.5137 0.7533 0.5062 0.7042
115 0.5135 0.7525 0.5061 0.6902
116 0.5136 0.7530 0.5060 0.6932
117 0.5137 0.7517 0.5063 0.6892
118 0.5136 0.7519 0.5062 0.7158
119 0.5136 0.7526 0.5060 0.6654
120 0.5137 0.7527 0.5063 0.7092
121 0.5136 0.7523 0.5063 0.7052
122 0.5136 0.7522 0.5061 0.6842
123 0.5134 0.7521 0.5062 0.6952
124 0.5138 0.7527 0.5063 0.6762
125 0.5136 0.7528 0.5059 0.6942
126 0.5135 0.7514 0.5066 0.7172
127 0.5137 0.7533 0.5061 0.6922
128 0.5136 0.7513 0.5060 0.6932
129 0.5136 0.7514 0.5065 0.7102
130 0.5135 0.7535 0.5061 0.7022
131 0.5134 0.7518 0.5059 0.7002
132 0.5136 0.7525 0.5060 0.6902
133 0.5136 0.7529 0.5060 0.6992
134 0.5136 0.7519 0.5062 0.7082
135 0.5133 0.7517 0.5060 0.6912
136 0.5137 0.7527 0.5060 0.6952
137 0.5133 0.7532 0.5058 0.6972
138 0.5134 0.7529 0.5060 0.7022
139 0.5133 0.7537 0.5056 0.7132
140 0.5135 0.7520 0.5055 0.7032
141 0.5136 0.7533 0.5061 0.6852
142 0.5135 0.7531 0.5058 0.7152
143 0.5134 0.7512 0.5057 0.7062
144 0.5134 0.7521 0.5055 0.6962
145 0.5132 0.7518 0.5060 0.6962
146 0.5133 0.7522 0.5064 0.6832
147 0.5134 0.7521 0.5058 0.6906
148 0.5136 0.7522 0.5057 0.7032
149 0.5134 0.7519 0.5056 0.6942
150 0.5133 0.7535 0.5056 0.7052
151 0.5133 0.7527 0.5058 0.6992
152 0.5133 0.7516 0.5058 0.6912
153 0.5133 0.7519 0.5057 0.7252
154 0.5133 0.7529 0.5055 0.6942
155 0.5135 0.7516 0.5058 0.7352
156 0.5134 0.7535 0.5054 0.7072
157 0.5134 0.7515 0.5061 0.7042
158 0.5133 0.7517 0.5056 0.7052
159 0.5133 0.7525 0.5060 0.7192
160 0.5130 0.7516 0.5060 0.6932
161 0.5131 0.7529 0.5056 0.7002
162 0.5133 0.7521 0.5054 0.6912
163 0.5131 0.7519 0.5057 0.6792
164 0.5131 0.7519 0.5059 0.6742
165 0.5133 0.7520 0.5061 0.6902
166 0.5128 0.7514 0.5056 0.6852
167 0.5133 0.7527 0.5054 0.7132
168 0.5130 0.7521 0.5057 0.7122
169 0.5132 0.7518 0.5053 0.6922
170 0.5132 0.7522 0.5057 0.6962
171 0.5131 0.7519 0.5054 0.6932
172 0.5130 0.7518 0.5056 0.7072
173 0.5130 0.7528 0.5052 0.6822
174 0.5130 0.7527 0.5055 0.6962
175 0.5133 0.7521 0.5057 0.6852
176 0.5129 0.7521 0.5059 0.7008
177 0.5131 0.7519 0.5058 0.6962
178 0.5129 0.7534 0.5049 0.7072
179 0.5128 0.7522 0.5052 0.7002
180 0.5131 0.7517 0.5057 0.7032
181 0.5129 0.7530 0.5053 0.6852
182 0.5129 0.7520 0.5054 0.6782
183 0.5128 0.7519 0.5055 0.7022
184 0.5130 0.7519 0.5053 0.6922
185 0.5127 0.7516 0.5058 0.7062
186 0.5127 0.7518 0.5060 0.6892
187 0.5127 0.7519 0.5053 0.6972
188 0.5126 0.7516 0.5054 0.6792
189 0.5129 0.7521 0.5055 0.7242
190 0.5127 0.7521 0.5053 0.7012
<class 'skorch.classifier.NeuralNetBinaryClassifier'>[initialized](
module_=MLP_network(
(output): Sequential(
(0): Linear(in_features=21, out_features=23, bias=True)
(1): ReLU(inplace=True)
(2): Linear(in_features=23, out_features=23, bias=True)
(3): ReLU(inplace=True)
(4): Linear(in_features=23, out_features=23, bias=True)
(5): ReLU(inplace=True)
(6): Linear(in_features=23, out_features=23, bias=True)
(7): ReLU(inplace=True)
(8): Linear(in_features=23, out_features=1, bias=True)
)
),
)
# Saving final MLP model
net_final.save_params('final_models/saved_final_MLP_parameters.pkl')
# Calculate accuracy score
net_final.score(X_test_MLP, y_test_MLP)
0.7547152018106374
from sklearn.svm import SVC
# Normalising data for use in SVM
scaler = StandardScaler()
# Fitting mean and standard deviation to the training values, then using these values to transform training and test sets
X_train_SVM = scaler.fit_transform(X_train_SVM)
X_test_SVM = scaler.transform(X_test_SVM)
def svm_objective(trial):
C = trial.suggest_float('C', 2**(-5), 2**3)
kernel = trial.suggest_categorical('kernel', ['poly', 'rbf', 'sigmoid']) # if degree=1, polynomial SVM is linear
degree = trial.suggest_int('degree', 1, 10) # only relevant to the polynomial function
gamma = trial.suggest_categorical('gamma', ['scale', 'auto']) # kernel coefficient
svc_model = SVC(C=C, kernel=kernel, degree=degree, gamma=gamma)
cv_accuracy = cross_val_score(svc_model, X_train_SVM, y_train_SVM, cv=5).mean()
return cv_accuracy
# Tuning SVM hyperparameters using optuna
svm_study = optuna.study.create_study(direction='maximize')
svm_study.optimize(svm_objective, n_trials=50)
[I 2022-05-06 19:30:08,647] A new study created in memory with name: no-name-f99acfdd-61a0-42bb-8167-3a0838a723c0 [I 2022-05-06 19:43:31,955] Trial 0 finished with value: 0.6533425139894837 and parameters: {'C': 7.9021787946659385, 'kernel': 'poly', 'degree': 10, 'gamma': 'scale'}. Best is trial 0 with value: 0.6533425139894837. [I 2022-05-06 19:47:00,449] Trial 1 finished with value: 0.6674480373391608 and parameters: {'C': 3.4657527964383283, 'kernel': 'sigmoid', 'degree': 9, 'gamma': 'auto'}. Best is trial 1 with value: 0.6674480373391608. [I 2022-05-06 19:50:51,561] Trial 2 finished with value: 0.6675895004724899 and parameters: {'C': 2.8746881873236987, 'kernel': 'sigmoid', 'degree': 1, 'gamma': 'auto'}. Best is trial 2 with value: 0.6675895004724899. [I 2022-05-06 20:00:47,498] Trial 3 finished with value: 0.7450085210734428 and parameters: {'C': 4.466892951756033, 'kernel': 'rbf', 'degree': 8, 'gamma': 'scale'}. Best is trial 3 with value: 0.7450085210734428. [I 2022-05-06 20:13:35,549] Trial 4 finished with value: 0.7420580687630697 and parameters: {'C': 7.878897038254958, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 3 with value: 0.7450085210734428. [I 2022-05-06 21:24:23,054] Trial 5 finished with value: 0.7517985656465311 and parameters: {'C': 0.3101038315146602, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-06 21:28:29,592] Trial 6 finished with value: 0.6674682638615084 and parameters: {'C': 4.652276441497297, 'kernel': 'sigmoid', 'degree': 7, 'gamma': 'auto'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-06 21:32:39,253] Trial 7 finished with value: 0.6680947142797982 and parameters: {'C': 1.2215380161292997, 'kernel': 'sigmoid', 'degree': 8, 'gamma': 'auto'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-06 21:39:28,709] Trial 8 finished with value: 0.7350053224036192 and parameters: {'C': 0.4888242507470793, 'kernel': 'poly', 'degree': 5, 'gamma': 'auto'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-06 21:49:18,036] Trial 9 finished with value: 0.7453318493335029 and parameters: {'C': 4.249678870690448, 'kernel': 'rbf', 'degree': 7, 'gamma': 'auto'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-06 23:49:20,404] Trial 10 finished with value: 0.7485045932686296 and parameters: {'C': 2.029025294669129, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-06 23:57:14,968] Trial 11 finished with value: 0.7485450177246006 and parameters: {'C': 1.9928272861729155, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 00:03:17,233] Trial 12 finished with value: 0.7515964842077113 and parameters: {'C': 0.1678356947153634, 'kernel': 'rbf', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 00:09:22,403] Trial 13 finished with value: 0.7502626997443106 and parameters: {'C': 0.09122832138018931, 'kernel': 'rbf', 'degree': 1, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 08:56:58,068] Trial 14 finished with value: 0.7435130816692449 and parameters: {'C': 5.880766458663762, 'kernel': 'rbf', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 09:04:11,723] Trial 15 finished with value: 0.7501616712772111 and parameters: {'C': 1.2029229419101874, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 09:08:41,499] Trial 16 finished with value: 0.7192021740662862 and parameters: {'C': 0.08776160623221739, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 09:15:52,339] Trial 17 finished with value: 0.7498383368909958 and parameters: {'C': 1.3712195677140833, 'kernel': 'rbf', 'degree': 5, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 09:24:20,401] Trial 18 finished with value: 0.7473729004543075 and parameters: {'C': 2.6616870811029156, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 09:38:36,100] Trial 19 finished with value: 0.7037224520074961 and parameters: {'C': 5.715050100058329, 'kernel': 'poly', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 09:45:24,856] Trial 20 finished with value: 0.7505254301193971 and parameters: {'C': 0.9000200324483667, 'kernel': 'rbf', 'degree': 6, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 09:52:18,767] Trial 21 finished with value: 0.7506264606285484 and parameters: {'C': 0.8274804561731317, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 10:22:44,118] Trial 22 finished with value: 0.748524813664822 and parameters: {'C': 1.8667019707323669, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 10:29:33,189] Trial 23 finished with value: 0.7507881176113974 and parameters: {'C': 0.6909020621827211, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 10:35:35,657] Trial 24 finished with value: 0.750646664688327 and parameters: {'C': 0.11966125716024248, 'kernel': 'rbf', 'degree': 1, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 10:43:53,411] Trial 25 finished with value: 0.7473324821244918 and parameters: {'C': 2.605392190066274, 'kernel': 'rbf', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 10:51:32,387] Trial 26 finished with value: 0.7489289745004101 and parameters: {'C': 1.663574521071257, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 10:58:15,749] Trial 27 finished with value: 0.7508891664990143 and parameters: {'C': 0.6524553785377406, 'kernel': 'rbf', 'degree': 5, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 11:03:23,545] Trial 28 finished with value: 0.668963682845189 and parameters: {'C': 0.5629596074034624, 'kernel': 'sigmoid', 'degree': 6, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 11:23:33,189] Trial 29 finished with value: 0.7179896976856773 and parameters: {'C': 7.285665606195565, 'kernel': 'poly', 'degree': 5, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 11:35:40,818] Trial 30 finished with value: 0.642106542986945 and parameters: {'C': 3.194912876841269, 'kernel': 'poly', 'degree': 10, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 11:42:24,723] Trial 31 finished with value: 0.7510710500042107 and parameters: {'C': 0.5781467434268718, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 11:49:44,451] Trial 32 finished with value: 0.7498383450592028 and parameters: {'C': 1.3192363805962062, 'kernel': 'rbf', 'degree': 2, 'gamma': 'scale'}. Best is trial 5 with value: 0.7517985656465311. [I 2022-05-07 11:56:20,651] Trial 33 finished with value: 0.7519198083836679 and parameters: {'C': 0.44331135194140114, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 33 with value: 0.7519198083836679. [I 2022-05-07 12:04:32,394] Trial 34 finished with value: 0.747918528507328 and parameters: {'C': 2.2533363225179874, 'kernel': 'rbf', 'degree': 4, 'gamma': 'auto'}. Best is trial 33 with value: 0.7519198083836679. [I 2022-05-07 12:08:41,291] Trial 35 finished with value: 0.6676703330480179 and parameters: {'C': 3.593983362473501, 'kernel': 'sigmoid', 'degree': 1, 'gamma': 'scale'}. Best is trial 33 with value: 0.7519198083836679. [I 2022-05-07 12:15:14,752] Trial 36 finished with value: 0.7520006450432993 and parameters: {'C': 0.4055376523017632, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 12:19:25,814] Trial 37 finished with value: 0.6679936796865434 and parameters: {'C': 1.5565429061376612, 'kernel': 'sigmoid', 'degree': 2, 'gamma': 'auto'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 12:30:03,941] Trial 38 finished with value: 0.7444224665223997 and parameters: {'C': 4.9313891316521214, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 12:37:10,778] Trial 39 finished with value: 0.7502222957088569 and parameters: {'C': 1.0616299090784473, 'kernel': 'rbf', 'degree': 4, 'gamma': 'auto'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 12:43:10,052] Trial 40 finished with value: 0.7514752292182643 and parameters: {'C': 0.2145467573267427, 'kernel': 'rbf', 'degree': 1, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 12:49:26,570] Trial 41 finished with value: 0.7517581554849224 and parameters: {'C': 0.332754409346694, 'kernel': 'rbf', 'degree': 1, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 12:55:46,554] Trial 42 finished with value: 0.751677318825291 and parameters: {'C': 0.3531787429258304, 'kernel': 'rbf', 'degree': 1, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 13:03:03,618] Trial 43 finished with value: 0.7505658484492128 and parameters: {'C': 0.9299758160615529, 'kernel': 'rbf', 'degree': 1, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 13:09:51,134] Trial 44 finished with value: 0.7515156414219246 and parameters: {'C': 0.5025204416048537, 'kernel': 'rbf', 'degree': 1, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 13:14:04,345] Trial 45 finished with value: 0.6675692862024528 and parameters: {'C': 2.2266827246078638, 'kernel': 'sigmoid', 'degree': 2, 'gamma': 'auto'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 13:20:34,854] Trial 46 finished with value: 0.7516773208673426 and parameters: {'C': 0.33995842519331126, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 13:28:03,698] Trial 47 finished with value: 0.7490704335496357 and parameters: {'C': 1.5596363137055245, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 13:33:15,813] Trial 48 finished with value: 0.7440183158970706 and parameters: {'C': 1.0579231493916015, 'kernel': 'poly', 'degree': 3, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993. [I 2022-05-07 13:39:41,308] Trial 49 finished with value: 0.7518187717483614 and parameters: {'C': 0.36784990355309266, 'kernel': 'rbf', 'degree': 4, 'gamma': 'scale'}. Best is trial 36 with value: 0.7520006450432993.
svm_best_params = svm_study.best_params
print('Hyperparameters for best trial:', svm_best_params)
Hyperparameters for best trial: {'C': 0.4055376523017632, 'kernel': 'rbf', 'degree': 3, 'gamma': 'scale'}
# Plotting slices for each hyperparameter
svm_fig1 = optuna.visualization.plot_slice(svm_study, params=['C', 'kernel', 'degree', 'gamma'])
svm_fig1.show()
# Plotting optimisation history
svm_fig2 = optuna.visualization.plot_optimization_history(svm_study)
svm_fig2.show()
np.random.seed(0)
final_svc_model = SVC(C=svm_best_params['C'], kernel=svm_best_params['kernel'], degree=svm_best_params['degree'], gamma=svm_best_params['gamma'])
np.random.seed(0)
final_svc_model.fit(X_train_SVM, y_train_SVM)
SVC(C=0.4055376523017632)
import pickle
# Pickling final SVC model
filename = 'final_models/saved_final_SVM_model.pkl'
pickle.dump(final_svc_model, open(filename, 'wb'))
final_svc_model.score(X_test_SVM, y_test_SVM)
0.7520746887966805